package com.techblor.chinesehelper;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

/**
 * 汉字字典类 可查询汉字的拼音, 部首和笔画 根据Unicode全球文字统一编码查询, 汉字的范围在Unicode编码4E00-9FA5, 汉字为 "一"
 * 到 "龥" , 共20902个中文
 * 
 * @author zhangceven
 *
 */
public class ChineseHelper {

	/** 汉字最小unicode值 */
	public static final char CHINESE_CHARACTER_MIN = '一';

	/** 汉字最大unicode值 */
	public static final char CHINESE_CHARACTER_MAX = '龥';

	/** 中文数据字典文件 */
	public static final String DICTIONARY_DATA_FILE = "data.txt";

	/** 中文数据字典文件编码 */
	public static final String FILE_CHARSET = "utf-8";

	/** 中文字典数据, 从"一"到"龥" */
	public static final String[] CHINESE_CHARACTER_DATA = new String[CHINESE_CHARACTER_MAX - CHINESE_CHARACTER_MIN + 1];

	/** 拼音数据（中文字母注音）下标 */
	public static final int INDEX_PINYIN_CHINESE = 0;

	/** 拼音数据（英文字母注音）下标 */
	public static final int INDEX_PINYIN_ENGLISH = 1;

	/** 拼音数据下标 */
	public static final int INDEX_PINYIN = 0;
	
	/** 中文部首*/
	public static final int INDEX_CHARACTER_COMPONENT = 1;

	public static final int INDEX_STROKES = 2;

	static {
		try {
			loadChineseCharaterDictionaryData();
		} catch (Exception e) {
			System.err.println("加载中文字典数据错误: " + e.getMessage());
		}
	}

	/**
	 * 加载中文字典数据
	 * 
	 * @throws Exception
	 */
	private static void loadChineseCharaterDictionaryData() throws Exception {

		InputStream inputStream = ChineseHelper.class.getResourceAsStream(DICTIONARY_DATA_FILE);

		if (inputStream == null) {
			throw new IOException(DICTIONARY_DATA_FILE + "中文字典数据文件不存在!");
		}

		try {
			BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, FILE_CHARSET));
			String line = null;
			int index = 0;
			while ((line = bufferedReader.readLine()) != null) {
				CHINESE_CHARACTER_DATA[index++] = line;
			}
		} catch (Exception e) {
			throw new Exception("读取中文字典数据错误");
		} finally {
			if (inputStream != null) {
				inputStream.close();
			}
		}

	}

	private static boolean isChineseCharacter(char value) {
		if (value >= CHINESE_CHARACTER_MIN && value <= CHINESE_CHARACTER_MAX) {
			return true;
		}
		return false;
	}

	/**
	 * 返回中文字符读音(拼音)列表, 可能为多个(多音字)
	 * 
	 * @param value
	 *            汉字
	 * @param useSpellFormat
	 *            使用拼写格式 true: 汉语拼音, false: 英语注音
	 * @return
	 */
	public static List<String> getPinyingByCharacter(char value, boolean useSpellFormat) {
		List<String> list = new ArrayList<>();
		if (isChineseCharacter(value)) {
			int index = useSpellFormat ? INDEX_PINYIN_CHINESE : INDEX_PINYIN_ENGLISH;
			String pinyinData = CHINESE_CHARACTER_DATA[value - CHINESE_CHARACTER_MIN].split("\\|")[INDEX_PINYIN];
			for (String pinyin : pinyinData.split(",")) {
				list.add(pinyin.split(",")[index]);
			}
		}
		return list;
	}

	/**
	 * 返回中文字符串读音(拼音)列表, 可能为多个(多音字)<br>
	 * 注意：对于多音字, 我们取第一个注音。 <br>
	 * (这里存在单词分词问题, 比如中国,这里获取出来的拼音为zhòng guó, 后期解决)
	 * @param value
	 * @param useSpellFormat
	 * @return
	 */
	public static String getPinyingByCharacter(String value, boolean useSpellFormat) {
		if (value == null) {
			return "";
		}
		boolean lastBlank = true;
		StringBuffer sb = new StringBuffer();
		for (char ch : value.toCharArray()) {
			if (isChineseCharacter(ch)) {
				List<String> pinyinList = getPinyingByCharacter(ch, useSpellFormat);
				if (!pinyinList.isEmpty()) {
					if (!lastBlank) {
						sb.append(" ");
					}
					sb.append(pinyinList.get(0)).append(' ');
					lastBlank = true;
				}
			} else {
				sb.append(ch);
				lastBlank = false;
			}
		}
		return sb.toString();
	}
	
	/**
	 * 获取中文部首(偏旁), 如果没有则返回""
	 * @param value
	 * @return
	 */
	public static String getComponentByCharacter(char value) {
		if(isChineseCharacter(value)) {
			return CHINESE_CHARACTER_DATA[value - CHINESE_CHARACTER_MIN].split("\\|")[INDEX_CHARACTER_COMPONENT];
		}
		return "";
	}
	
	/**
	 * 获取中文部首(偏旁), 如果没有则返回""
	 * @param character
	 * @return
	 */
	public static String getComponentByCharacter(String value) {
		if(value == null || value.isEmpty()) {
			return "";
		}
		return getComponentByCharacter(value.charAt(0));
	}
	
	/**
	 * 获取中文笔画, 如 "张" 的笔画为 "5153154"
	 * 12345 对应 "横竖撇捺折"
	 * @param value
	 * @return
	 */
	public static String getStrokesByCharacter(String value) {
		if(value == null || value.isEmpty()) {
			return "";
		}
		return getStrokesByCharacter(value.charAt(0));
	}
	
	/**
	 * 获取中文笔画, 如 "张" 的笔画为 "5153154"
	 * 12345 对应 "横竖撇捺折"
	 * @param value
	 * @return
	 */
	public static String getStrokesByCharacter(char value) {
		if(isChineseCharacter(value)) {
			return CHINESE_CHARACTER_DATA[value - CHINESE_CHARACTER_MIN].split("\\|")[INDEX_STROKES];
		}
		return "";
	}

	public static void main(String[] args) {
		System.out.println(getPinyingByCharacter("张", true));
		System.out.println(getComponentByCharacter("张"));
		System.out.println(getStrokesByCharacter("张"));
	}
}
